{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#######################################################################################################\n",
    "# Summary\n",
    "# 1. Keras Multi-GPU example\n",
    "#######################################################################################################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "MULTI_GPU = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda/envs/py35/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n",
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "os.environ['KERAS_BACKEND'] = \"tensorflow\"\n",
    "import keras as K \n",
    "import tensorflow\n",
    "import multiprocessing\n",
    "import random\n",
    "from keras.preprocessing.image import ImageDataGenerator\n",
    "from keras.applications.densenet import DenseNet121\n",
    "from keras.applications.imagenet_utils import preprocess_input\n",
    "from keras.optimizers import Adam\n",
    "from keras.callbacks import ReduceLROnPlateau, Callback, ModelCheckpoint\n",
    "from keras.layers import Dense\n",
    "from keras.models import Model\n",
    "from keras.utils import multi_gpu_model\n",
    "from sklearn.metrics.ranking import roc_auc_score\n",
    "from sklearn.model_selection import train_test_split\n",
    "from PIL import Image\n",
    "from common.utils import download_data_chextxray, get_imgloc_labels, get_train_valid_test_split\n",
    "from common.utils import compute_roc_auc, get_cuda_version, get_cudnn_version, get_gpu_name\n",
    "from common.params_dense import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS:  linux\n",
      "Python:  3.5.4 |Anaconda custom (64-bit)| (default, Nov 20 2017, 18:44:38) \n",
      "[GCC 7.2.0]\n",
      "Keras:  2.1.4\n",
      "Numpy:  1.14.1\n",
      "Tensorflow:  1.8.0\n",
      "tensorflow\n",
      "channels_last\n",
      "GPU:  ['Tesla V100-PCIE-16GB', 'Tesla V100-PCIE-16GB', 'Tesla V100-PCIE-16GB', 'Tesla V100-PCIE-16GB']\n",
      "CUDA Version 9.1.85\n",
      "CuDNN Version  7.0.5\n"
     ]
    }
   ],
   "source": [
    "print(\"OS: \", sys.platform)\n",
    "print(\"Python: \", sys.version)\n",
    "print(\"Keras: \", K.__version__)\n",
    "print(\"Numpy: \", np.__version__)\n",
    "print(\"Tensorflow: \", tensorflow.__version__)\n",
    "print(K.backend.backend())\n",
    "print(K.backend.image_data_format())\n",
    "print(\"GPU: \", get_gpu_name())\n",
    "print(get_cuda_version())\n",
    "print(\"CuDNN Version \", get_cudnn_version())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "if K.__version__ != \"2.1.4\":\n",
    "    raise Exception(\"Keras 2.1.5 introduces some breaking changes for data-loader\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPUs:  24\n",
      "GPUs:  4\n"
     ]
    }
   ],
   "source": [
    "CPU_COUNT = multiprocessing.cpu_count()\n",
    "GPU_COUNT = len(get_gpu_name())\n",
    "print(\"CPUs: \", CPU_COUNT)\n",
    "print(\"GPUs: \", GPU_COUNT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chestxray/images chestxray/Data_Entry_2017.csv\n"
     ]
    }
   ],
   "source": [
    "# Model-params\n",
    "# Normalising done by keras.applications.densenet.preprocess_input()\n",
    "# Paths\n",
    "CSV_DEST = \"chestxray\"\n",
    "IMAGE_FOLDER = os.path.join(CSV_DEST, \"images\")\n",
    "LABEL_FILE = os.path.join(CSV_DEST, \"Data_Entry_2017.csv\")\n",
    "print(IMAGE_FOLDER, LABEL_FILE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Manually scale to multi-gpu\n",
    "if MULTI_GPU:\n",
    "    LR *= GPU_COUNT \n",
    "    BATCHSIZE *= GPU_COUNT\n",
    "#Make sure channels-first (not last)\n",
    "K.backend.set_image_data_format('channels_first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Please make sure to download\n",
      "https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-linux#download-and-install-azcopy\n",
      "Data already exists\n",
      "CPU times: user 549 ms, sys: 217 ms, total: 766 ms\n",
      "Wall time: 765 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Download data\n",
    "print(\"Please make sure to download\")\n",
    "print(\"https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-linux#download-and-install-azcopy\")\n",
    "download_data_chextxray(CSV_DEST)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#####################################################################################################\n",
    "## Data Loading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "class XrayData():\n",
    "    \n",
    "    def __init__(self, img_dir, lbl_file, patient_ids, \n",
    "                 width=WIDTH, height=HEIGHT, batch_size=BATCHSIZE, num_classes=CLASSES,\n",
    "                 shuffle=True, seed=None, augment=False):\n",
    "        \n",
    "        self.patient_ids = patient_ids\n",
    "        self.lbl_file = lbl_file\n",
    "        self.width = width\n",
    "        self.height = height\n",
    "        \n",
    "        # Hack for flow_from_directory to work, give it path above\n",
    "        # Otherwise it requires images to be kept in folders\n",
    "        self.child_path  = os.path.split(img_dir)[-1]\n",
    "        self.parent_path =  img_dir.replace(self.child_path,'')\n",
    "        \n",
    "        # With version 2.1.5 the input has become a picture not array\n",
    "        if augment:\n",
    "            datagen = ImageDataGenerator(\n",
    "                horizontal_flip=True,\n",
    "                preprocessing_function=self.preprocess_fn_augment)\n",
    "        else:\n",
    "            datagen = ImageDataGenerator(\n",
    "                preprocessing_function=self._preprocess_fn)   \n",
    "\n",
    "        # Create flow-from-directory\n",
    "        flowgen = datagen.flow_from_directory(\n",
    "            directory=self.parent_path,  # hack: this is one directory up\n",
    "            target_size=(width, height),\n",
    "            batch_size=batch_size,\n",
    "            shuffle=shuffle,\n",
    "            seed=seed,\n",
    "            class_mode='binary') # this can be none since overwritten   \n",
    "        \n",
    "        # Override previously created classes variables\n",
    "        # filenames, classes\n",
    "        flowgen.filenames, flowgen.classes = get_imgloc_labels(\n",
    "            self.child_path, lbl_file, patient_ids)\n",
    "        # number of files\n",
    "        flowgen.n = len(flowgen.filenames)\n",
    "        flowgen.num_classes = num_classes\n",
    "        \n",
    "        self.generator = flowgen\n",
    "        print(\"Loaded {} labels and {} images\".format(\n",
    "            len(self.generator.classes), len(self.generator.filenames)))\n",
    "        \n",
    "    def preprocess_fn_augment(self, x):\n",
    "        return self._preprocess_fn(x, augment=True)\n",
    "    \n",
    "    def _preprocess_fn(self, x, augment=False):\n",
    "        # K 2.1.4 and below return CHW array\n",
    "        # k 2.1.5 onwards returns an image\n",
    "        x = preprocess_input(x, data_format='channels_first', mode='torch')\n",
    "        # Data augmentation\n",
    "        if augment:\n",
    "            x = random_crop(x, (self.height, self.width))\n",
    "        return x\n",
    "                        \n",
    "# Random crop has to be appied with preprocessing function\n",
    "def random_crop(img, size):\n",
    "    \"\"\"\n",
    "    Args:\n",
    "        img (~numpy.ndarray): An image array to be cropped. This is in\n",
    "            CHW format.\n",
    "        size (tuple): The size of output image after cropping.\n",
    "            This value is :math:`(height, width)`.\n",
    "    \"\"\"\n",
    "    H, W = size\n",
    "    y_offset = random.randint(0, img.shape[1] - H)\n",
    "    y_slice = slice(y_offset, y_offset + H)\n",
    "    x_offset = random.randint(0, img.shape[2] - W)\n",
    "    x_slice = slice(x_offset, x_offset + W)\n",
    "    img = img[:,y_slice, x_slice]\n",
    "    return img  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train:21563 valid:3080 test:6162\n"
     ]
    }
   ],
   "source": [
    "train_set, valid_set, test_set = get_train_valid_test_split(TOT_PATIENT_NUMBER)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 112120 images belonging to 1 classes.\n",
      "Loaded 87306 labels and 87306 images\n"
     ]
    }
   ],
   "source": [
    "train_dataset = XrayData(IMAGE_FOLDER, LABEL_FILE, train_set, augment=True).generator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 112120 images belonging to 1 classes.\n",
      "Loaded 7616 labels and 7616 images\n",
      "Found 112120 images belonging to 1 classes.\n",
      "Loaded 17198 labels and 17198 images\n"
     ]
    }
   ],
   "source": [
    "valid_dataset = XrayData(IMAGE_FOLDER, LABEL_FILE, valid_set, shuffle=False).generator\n",
    "test_dataset = XrayData(IMAGE_FOLDER, LABEL_FILE, test_set, shuffle=False).generator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#####################################################################################################\n",
    "## Helper Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_symbol(out_features=CLASSES):\n",
    "    model = DenseNet121(include_top=False, weights='imagenet', \n",
    "                        input_shape=(3, 224, 224), pooling='avg')\n",
    "    # Add classifier to model FC-14\n",
    "    classifier = Dense(out_features, activation='sigmoid')(model.output)\n",
    "    model = Model(inputs=model.input, outputs=classifier)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def init_symbol(sym, lr=LR):\n",
    "    # BCE Loss since classes not mutually exclusive + Sigmoid FC-layer\n",
    "    sym.compile(\n",
    "        loss = \"binary_crossentropy\",\n",
    "        optimizer = Adam(lr, beta_1=0.9, beta_2=0.999, epsilon=None))\n",
    "    # Callbacks\n",
    "    sch = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)\n",
    "    #This doesnt work with Keras multi-gpu\n",
    "    #FLAG: Check if fixed in future versions\n",
    "    #chp = ModelCheckpoint('best_chexnet.pth.hdf5', monitor='val_loss', save_weights_only=False)\n",
    "    callbacks = [sch]\n",
    "    return sym, callbacks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "#####################################################################################################\n",
    "## Train CheXNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 20s, sys: 43.1 s, total: 2min 3s\n",
      "Wall time: 2min 13s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "if MULTI_GPU:\n",
    "    with tensorflow.device('/cpu:0'):\n",
    "        # Recommended to instantiate base model on CPU\n",
    "        # https://keras.io/utils/#multi_gpu_model\n",
    "        sym = get_symbol()\n",
    "    chexnet_sym = multi_gpu_model(sym, gpus=GPU_COUNT)\n",
    "else:\n",
    "    chexnet_sym = get_symbol()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 39.9 ms, sys: 0 ns, total: 39.9 ms\n",
      "Wall time: 39.1 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Load optimiser, loss\n",
    "model, callbacks = init_symbol(chexnet_sym)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "342/342 [==============================] - 285s 832ms/step - loss: 0.1741 - val_loss: 0.1606\n",
      "Epoch 2/5\n",
      "342/342 [==============================] - 185s 540ms/step - loss: 0.1495 - val_loss: 0.1457\n",
      "Epoch 3/5\n",
      "342/342 [==============================] - 188s 548ms/step - loss: 0.1457 - val_loss: 0.1514\n",
      "Epoch 4/5\n",
      "342/342 [==============================] - 187s 546ms/step - loss: 0.1427 - val_loss: 0.1436\n",
      "Epoch 5/5\n",
      "342/342 [==============================] - 186s 543ms/step - loss: 0.1393 - val_loss: 0.1431\n",
      "CPU times: user 1h 16min 34s, sys: 16min 23s, total: 1h 32min 58s\n",
      "Wall time: 17min 50s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fe079754208>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "# 1 GPU - Main training loop: 37min 56s\n",
    "# 4 GPU - Main training loop: 17min 50s\n",
    "model.fit_generator(train_dataset,\n",
    "                    epochs=EPOCHS,\n",
    "                    verbose=1,\n",
    "                    callbacks=callbacks,\n",
    "                    workers=2*CPU_COUNT,  # Num of CPUs if multiprocessing\n",
    "                    use_multiprocessing=False,  # Faster than with threading\n",
    "                    validation_data=valid_dataset,\n",
    "                    max_queue_size=20)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "#####################################################################################################\n",
    "## Test CheXNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load model for testing\n",
    "# Currently multi-GPU checkpointing is broken on Keras\n",
    "# For now use in-RAM model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2min 2s, sys: 25.9 s, total: 2min 27s\n",
      "Wall time: 40.5 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "## Evaluate\n",
    "y_guess = model.predict_generator(test_dataset, workers=CPU_COUNT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Full AUC [0.7818915697410709, 0.8686046483980264, 0.794963619139759, 0.8838625230383254, 0.8667650695266846, 0.9254906191635508, 0.710375794879198, 0.8892920496694081, 0.6243538394826096, 0.838812605511231, 0.7336969085192947, 0.7971210770715629, 0.7608787696849162, 0.8811401858032785]\n",
      "Validation AUC: 0.8112\n"
     ]
    }
   ],
   "source": [
    "# 1 GPU AUC: 0.8146\n",
    "# 4 GPU AUC: 0.8112\n",
    "print(\"Validation AUC: {0:.4f}\".format(compute_roc_auc(test_dataset.classes, y_guess, CLASSES)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "#####################################################################################################\n",
    "## Synthetic Data (Pure Training)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test on fake-data -> no IO lag\n",
    "batch_in_epoch = train_dataset.n//BATCHSIZE\n",
    "tot_num = batch_in_epoch * BATCHSIZE\n",
    "fake_X = np.random.rand(tot_num, 3, 224, 224).astype(np.float32)\n",
    "fake_y = np.random.rand(tot_num, CLASSES).astype(np.float32) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "87296/87296 [==============================] - 188s 2ms/step - loss: 0.7236\n",
      "Epoch 2/5\n",
      "87296/87296 [==============================] - 185s 2ms/step - loss: 0.6931\n",
      "Epoch 3/5\n",
      "87296/87296 [==============================] - 185s 2ms/step - loss: 0.6927\n",
      "Epoch 4/5\n",
      "87296/87296 [==============================] - 185s 2ms/step - loss: 0.6917\n",
      "Epoch 5/5\n",
      "87296/87296 [==============================] - 184s 2ms/step - loss: 0.6898\n",
      "CPU times: user 51min 33s, sys: 14min 5s, total: 1h 5min 39s\n",
      "Wall time: 15min 26s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdfe06bbda0>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "# 1 GPU - Synthetic data: 35min 51s \n",
    "# 4 GPU - Synthetic data: 15min 26s\n",
    "model.fit(fake_X,\n",
    "          fake_y,\n",
    "          batch_size=BATCHSIZE,\n",
    "          epochs=EPOCHS,\n",
    "          verbose=1)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
